{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "45c7f533-5a95-4f88-99dd-b04fb96b24ae",
   "metadata": {},
   "source": [
    "## Embed_documents"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "65d0ba3b-15a0-4228-bc5a-5723e1913d55",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain_openai import OpenAIEmbeddings\n",
    "\n",
    "e_model = OpenAIEmbeddings()\n",
    "\n",
    "# 对指定文本进行向量化\n",
    "ebeddings = e_model.embed_documents(\n",
    "     [\n",
    "        \"你好\",\n",
    "        \"你好啊\",\n",
    "        \"你叫什么名字?\",\n",
    "        \"我叫王大锤\",\n",
    "        \"很高兴认识你大锤\",\n",
    "    ]\n",
    ")\n",
    "\n",
    "ebeddings"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "164ac68d-2656-40e0-995a-72cb9223c7c8",
   "metadata": {},
   "source": [
    "## Embed_query"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "32456f31-00ff-4653-a5a2-fd8e2f6fd05a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 对查询内容进行向量化\n",
    "embedded_query = e_model.embed_query(\"这段对话中提到了什么名字?\")\n",
    "embedded_query[:5]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b0d5287d-3bbd-4d2e-81d7-34d513f95b1c",
   "metadata": {},
   "source": [
    "## 嵌入向量缓存"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4d350f61-83d6-4353-917a-dc5c35ae087c",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.embeddings import CacheBackedEmbeddings\n",
    "from langchain.storage import  LocalFileStore\n",
    "from langchain.document_loaders import TextLoader\n",
    "from langchain.text_splitter import CharacterTextSplitter\n",
    "from langchain_openai import OpenAIEmbeddings\n",
    "\n",
    "u_embeddings = OpenAIEmbeddings()\n",
    "# 指定缓存文件的目录\n",
    "fs = LocalFileStore(\"./cache/\")\n",
    "cached_embeddings = CacheBackedEmbeddings.from_bytes_store(\n",
    "    u_embeddings,\n",
    "    fs,\n",
    "    namespace=u_embeddings.model,\n",
    ")\n",
    "list(fs.yield_keys())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "426ebdab-29aa-4604-a8df-bc56d7c9cce6",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 加载文档，切分文档\n",
    "raw_documents = TextLoader(\"letter.txt\").load()\n",
    "text_splitter = CharacterTextSplitter(chunk_size=600,chunk_overlap=0)\n",
    "documents = text_splitter.split_documents(raw_documents)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1e0ed863-28ec-4e75-b153-ebee8f26ebdb",
   "metadata": {},
   "outputs": [],
   "source": [
    "! pip install faiss-cup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3e2f0544-00e7-45cb-b710-2700623f8d40",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 将切分文档向量化病存储在缓存中\n",
    "from langchain.vectorstores import FAISS\n",
    "%timeit -r  1 -n 1 db = FAISS.from_documents(documents, cached_embeddings)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "eb0b3c0f-6a04-45de-a81c-1cfe2dc1460b",
   "metadata": {},
   "outputs": [],
   "source": [
    "#查看缓存中的键\n",
    "list(fs.yield_keys())"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python (langchain)",
   "language": "python",
   "name": "langchain-env"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.13.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
